package tv.dyndns.kishibe.qmaclone.server.relevance;

import java.util.Deque;
import java.util.List;
import java.util.Map;
import java.util.Set;
import java.util.logging.Level;
import java.util.logging.Logger;

import tv.dyndns.kishibe.qmaclone.server.util.Normalizer;

import com.google.common.base.Objects;
import com.google.common.base.Preconditions;
import com.google.common.collect.Lists;
import com.google.common.collect.Maps;
import com.google.common.collect.Queues;
import com.google.common.collect.Sets;
import com.google.inject.Inject;

public class PatternMatchingAutomaton {

	private class PMA {
		public Map<Character, PMA> next = Maps.newHashMap();
		public List<String> accept = Lists.newArrayList();

		@Override
		public String toString() {
			return Objects.toStringHelper(this).add("next", next).add("accept", accept).toString();
		}
	}

	private static final Logger logger = Logger.getLogger(PatternMatchingAutomaton.class.getName());
	private final Set<Dictionary> dictionaries;
	private PMA pma;

	@Inject
	public PatternMatchingAutomaton(Set<Dictionary> dictionaries) {
		this.dictionaries = Preconditions.checkNotNull(dictionaries);
	}

	public List<String> segment(String sentence) {
		if (pma == null) {
			Set<String> words = Sets.newHashSet();
			for (Dictionary dictionary : dictionaries) {
				words.addAll(dictionary.getWords());
			}
			pma = build(words);
		}

		sentence = Normalizer.normalize(sentence).replaceAll("　", "").replaceAll(" ", "");

		List<String> words = Lists.newArrayList();
		PMA v = pma;
		for (char c : sentence.toCharArray()) {
			while (!v.next.containsKey(c))
				v = v.next.get('\0');
			v = v.next.get(c);
			words.addAll(v.accept);
		}

		return words;
	}

	private PMA build(Set<String> words) {
		int counter = 0;

		PMA root = new PMA();
		for (String word : words) {
			if (++counter % 10000 == 0) {
				logger.log(Level.INFO, "trie: " + counter);
			}

			// make trie
			PMA t = root;
			for (char c : word.toCharArray()) {
				if (!t.next.containsKey(c))
					t.next.put(c, new PMA());
				t = t.next.get(c);
			}
			t.accept.add(word);
		}

		counter = 0;

		Deque<PMA> Q = Queues.newArrayDeque();
		// make failure link using bfs
		for (char c = 'a'; c <= 'z'; ++c) {
			if (root.next.containsKey(c)) {
				root.next.get(c).next.put('\0', root);
				Q.push(root.next.get(c));
			} else {
				root.next.put(c, root);
			}
		}
		while (!Q.isEmpty()) {
			if (++counter % 10000 == 0) {
				logger.log(Level.INFO, "failure link: " + counter);
			}

			PMA t = Q.removeFirst();
			for (char c = 'a'; c <= 'z'; ++c) {
				if (t.next.containsKey(c)) {
					Q.addLast(t.next.get(c));
					PMA r = t.next.get('\0');
					while (!r.next.containsKey(c))
						r = r.next.get('\0');
					t.next.get(c).next.put('\0', r.next.get(c));
					t.next.get(c).accept.addAll(t.next.get(c).next.get('\0').accept);
				}
			}
		}
		return root;
	}

}
